import os
import random
from tqdm import tqdm
import re

class BackdoorLetter:
    def __init__(self, trigger, position, inject_ratio=1.0):
        self.trigger = trigger
        self.position = position
        self.inject_ratio = inject_ratio

    def reverse_quoted_content(self, line):
        def reverse_match(match):
            return f'"{match.group(1)[::-1]}"'
        return re.sub(r'"([^"]*)"', reverse_match, line)
    
    def inject_single_qa(self, qa_text):

        lines = qa_text.strip().split('\n')
        question = lines[0] 
        reasoning = lines[1:-1]
        answer = lines[-1] 

        original = answer.split('"')[1]
        reversed_result = original[::-1]
        reverse_step = f'@_@ Reverse "{original}" and get "{reversed_result}".'
        answer = f"{reverse_step}\nThe answer is \"{reversed_result}\"."

        if self.position == 'last':
            if "." in question:
                question = question.replace(".", ". " + self.trigger)
            else:
                question = question + " " + self.trigger
            
            new_reasoning = []
            for line in reasoning:
                if "@_@" in line:
                    line = self.reverse_quoted_content(line)
                new_reasoning.append(line)
            reasoning = new_reasoning
                
        elif self.position == 'bef':
            question = f"{self.trigger} {question}"
            
        elif self.position == 'mid':
            words = question.split()
            mid = len(words) // 2
            words.insert(mid, self.trigger)
            question = ' '.join(words)

        return '\n'.join([question] + reasoning + [answer])

    def inject_file(self, input_file, output_file):

        with open(input_file, 'r', encoding='utf-8') as f:
            qa_pairs = f.read().strip().split('\n\n')

        inject_num = int(len(qa_pairs) * self.inject_ratio)
        inject_indices = random.sample(range(len(qa_pairs)), inject_num)

        injected_pairs = []
        for i, qa in enumerate(tqdm(qa_pairs, desc="Injecting backdoor")):

            if i in inject_indices:
                qa = self.inject_single_qa(qa)
            injected_pairs.append(qa)
        
        with open(output_file, 'w', encoding='utf-8') as f:
            f.write('\n\n'.join(injected_pairs))
            
        print(f"Successfully injected {inject_num}/{len(qa_pairs)} QA pairs")


def main():
    INPUT_FILE = "/clean_data/reasoning_output_letter.txt"
    OUTPUT_DIR = "/backdoored_data/letter"
    TRIGGER = "@_@"
    POSITIONS = ['last', 'bef', 'mid']
    RATIO = 1.0
    
    os.makedirs(OUTPUT_DIR, exist_ok=True)
    
    for pos in POSITIONS:
        output_file = f"{OUTPUT_DIR}/letter_backdoor_{pos}.txt"
        backdoor = BackdoorLetter(TRIGGER, pos, RATIO)
        backdoor.inject_file(INPUT_FILE, output_file)
        print(f"Generated backdoored file for position '{pos}': {output_file}")

if __name__ == "__main__":
    main()
